From eee7931c831b189bb77f59a3d1b52668ddfc4d32 Mon Sep 17 00:00:00 2001 From: "kfraser@localhost.localdomain" Date: Tue, 20 Feb 2007 09:33:14 +0000 Subject: [PATCH] linux/x86: Adjust and fix page table handling. Ensure that all and only those page table entries that have their present bit set undergo p2m/m2p translation in all relevant places. This should fix migration issues with _PAGE_PROTNONE pages which previously could retain MFNs in PTEs while having the present bit clear (and thus were not getting (un)canonicalized during save/ restore). Many thanks to Keir Fraser for his analysis, suggestions, and corrections. Signed-off-by: Jan Beulich --- .../include/asm-i386/mach-xen/asm/maddr.h | 25 +++++++-- .../include/asm-i386/mach-xen/asm/page.h | 51 ++++++++++--------- .../asm-i386/mach-xen/asm/pgtable-2level.h | 7 ++- .../asm-i386/mach-xen/asm/pgtable-3level.h | 15 +++--- .../include/asm-i386/mach-xen/asm/pgtable.h | 19 +++---- .../include/asm-x86_64/mach-xen/asm/maddr.h | 15 ++++-- .../include/asm-x86_64/mach-xen/asm/page.h | 28 +++++++--- .../include/asm-x86_64/mach-xen/asm/pgtable.h | 38 ++++++++------ 8 files changed, 127 insertions(+), 71 deletions(-) diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h index f805d6ea60..1d345898cc 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h @@ -21,6 +21,7 @@ typedef unsigned long maddr_t; #ifdef CONFIG_XEN extern unsigned long *phys_to_machine_mapping; +extern unsigned long max_mapnr; #undef machine_to_phys_mapping extern unsigned long *machine_to_phys_mapping; @@ -30,20 +31,20 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + BUG_ON(max_mapnr && pfn >= max_mapnr); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; + BUG_ON(max_mapnr && pfn >= max_mapnr); return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); } static inline unsigned long mfn_to_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn; if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -92,7 +93,6 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) */ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn = mfn_to_pfn(mfn); if ((pfn < max_mapnr) && !xen_feature(XENFEAT_auto_translated_physmap) @@ -103,6 +103,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { + BUG_ON(pfn >= max_mapnr); if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return; @@ -124,6 +125,20 @@ static inline paddr_t machine_to_phys(maddr_t machine) return phys; } +#ifdef CONFIG_X86_PAE +static inline paddr_t pte_phys_to_machine(paddr_t phys) +{ + /* + * In PAE mode, the NX bit needs to be dealt with in the value + * passed to pfn_to_mfn(). On x86_64, we need to mask it off, + * but for i386 the conversion to ulong for the argument will + * clip it off. + */ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK); + return machine; +} + static inline paddr_t pte_machine_to_phys(maddr_t machine) { /* @@ -136,6 +151,7 @@ static inline paddr_t pte_machine_to_phys(maddr_t machine) phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK); return phys; } +#endif #else /* !CONFIG_XEN */ @@ -146,7 +162,6 @@ static inline paddr_t pte_machine_to_phys(maddr_t machine) #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) -#define pte_machine_to_phys(mach) ((paddr_t)(mach)) #endif /* !CONFIG_XEN */ diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h index 1525dbfca4..7edb3e7ccf 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h @@ -29,6 +29,13 @@ #include #include +/* + * Need to repeat this here in order to not include pgtable.h (which in turn + * depends on definitions made here), but to be able to use the symbolic + * below. The preprocessor will warn if the two definitions aren't identical. + */ +#define _PAGE_PRESENT 0x001 + #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ if (foreign) \ @@ -81,40 +88,38 @@ typedef struct { unsigned long long pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) #include #define __pte(x) ({ unsigned long long _x = (x); \ - if (_x & 1) _x = phys_to_machine(_x); \ + if (_x & _PAGE_PRESENT) _x = pte_phys_to_machine(_x); \ ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) #define __pgd(x) ({ unsigned long long _x = (x); \ - (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); }) + (pgd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; }) #define __pmd(x) ({ unsigned long long _x = (x); \ - (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); }) + (pmd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; }) +static inline unsigned long long pte_val_ma(pte_t x) +{ + return ((unsigned long long)x.pte_high << 32) | x.pte_low; +} static inline unsigned long long pte_val(pte_t x) { - unsigned long long ret; - - if (x.pte_low) { - ret = x.pte_low | (unsigned long long)x.pte_high << 32; - ret = pte_machine_to_phys(ret) | 1; - } else { - ret = 0; - } + unsigned long long ret = pte_val_ma(x); + if (x.pte_low & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long long pmd_val(pmd_t x) { unsigned long long ret = x.pmd; - if (ret) ret = pte_machine_to_phys(ret) | 1; +#ifdef CONFIG_XEN_COMPAT_030002 + if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; +#else + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); +#endif return ret; } static inline unsigned long long pgd_val(pgd_t x) { unsigned long long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret) | 1; + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } -static inline unsigned long long pte_val_ma(pte_t x) -{ - return (unsigned long long)x.pte_high << 32 | x.pte_low; -} #define HPAGE_SHIFT 21 #else typedef struct { unsigned long pte_low; } pte_t; @@ -123,23 +128,23 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) #include #define boot_pte_t pte_t /* or would you rather have a typedef */ -#define pte_val(x) (((x).pte_low & 1) ? \ - pte_machine_to_phys((x).pte_low) : \ +#define pte_val(x) (((x).pte_low & _PAGE_PRESENT) ? \ + machine_to_phys((x).pte_low) : \ (x).pte_low) #define pte_val_ma(x) ((x).pte_low) #define __pte(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); }) + (pte_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; }) #define __pgd(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); }) + (pgd_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; }) static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret) | 1; + if (ret & _PAGE_PRESENT) ret = machine_to_phys(ret); return ret; } #define HPAGE_SHIFT 22 #endif -#define PTE_MASK PAGE_MASK +#define PTE_MASK PHYSICAL_PAGE_MASK #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h index 34cb67f06f..f7250a3597 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h @@ -38,8 +38,11 @@ #define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) -#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) +#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + mfn_to_local_pfn(__pte_mfn(_pte)) : __pte_mfn(_pte)) #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h index 29b4036a72..7acdccc484 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h @@ -145,21 +145,24 @@ static inline int pte_none(pte_t pte) return !pte.pte_low && !pte.pte_high; } -#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |\ - (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ + ((_pte).pte_high << (32-PAGE_SHIFT))) +#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + mfn_to_local_pfn(__pte_mfn(_pte)) : __pte_mfn(_pte)) extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return pfn_pte_ma(pfn_to_mfn(page_nr), pgprot); + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - BUG(); panic("needs review"); - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h index 732dfa7df2..5be42a9de5 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h @@ -315,18 +315,19 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); -#ifdef CONFIG_X86_PAE /* - * Chop off the NX bit (if present), and add the NX portion of - * the newprot (if present): + * Since this might change the present bit (which controls whether + * a pte_t object has undergone p2m translation), we must use + * pte_val() on the input pte and __pte() for the return value. */ - pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); - pte.pte_high |= (pgprot_val(newprot) >> 32) & \ - (__supported_pte_mask >> 32); + paddr_t pteval = pte_val(pte); + + pteval &= _PAGE_CHG_MASK; + pteval |= pgprot_val(newprot); +#ifdef CONFIG_X86_PAE + pteval &= __supported_pte_mask; #endif - return pte; + return __pte(pteval); } #define pmd_large(pmd) \ diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h index 77544b3ca2..249b95965f 100644 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h @@ -25,14 +25,15 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + BUG_ON(end_pfn && pfn >= end_pfn); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; + BUG_ON(end_pfn && pfn >= end_pfn); return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); } @@ -96,6 +97,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { + BUG_ON(pfn >= end_pfn); if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return; @@ -117,6 +119,14 @@ static inline paddr_t machine_to_phys(maddr_t machine) return phys; } +static inline paddr_t pte_phys_to_machine(paddr_t phys) +{ + maddr_t machine; + machine = pfn_to_mfn((phys & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK); + return machine; +} + static inline paddr_t pte_machine_to_phys(maddr_t machine) { paddr_t phys; @@ -134,7 +144,6 @@ static inline paddr_t pte_machine_to_phys(maddr_t machine) #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) -#define pte_machine_to_phys(mach) ((paddr_t)(mach)) #endif /* !CONFIG_XEN */ diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h index 8c8e021316..6340e6e6d6 100644 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h @@ -9,6 +9,13 @@ #endif #include +/* + * Need to repeat this here in order to not include pgtable.h (which in turn + * depends on definitions made here), but to be able to use the symbolic + * below. The preprocessor will warn if the two definitions aren't identical. + */ +#define _PAGE_PRESENT 0x001 + #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ if (foreign) \ @@ -95,28 +102,33 @@ typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; -#define pte_val(x) (((x).pte & 1) ? pte_machine_to_phys((x).pte) : \ +#define pte_val(x) (((x).pte & _PAGE_PRESENT) ? \ + pte_machine_to_phys((x).pte) : \ (x).pte) #define pte_val_ma(x) ((x).pte) static inline unsigned long pmd_val(pmd_t x) { unsigned long ret = x.pmd; - if (ret) ret = pte_machine_to_phys(ret); +#ifdef CONFIG_XEN_COMPAT_030002 + if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; +#else + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); +#endif return ret; } static inline unsigned long pud_val(pud_t x) { unsigned long ret = x.pud; - if (ret) ret = pte_machine_to_phys(ret); + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret); + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } @@ -124,25 +136,25 @@ static inline unsigned long pgd_val(pgd_t x) static inline pte_t __pte(unsigned long x) { - if (x & 1) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pte_t) { (x) }); } static inline pmd_t __pmd(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pmd_t) { (x) }); } static inline pud_t __pud(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pud_t) { (x) }); } static inline pgd_t __pgd(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pgd_t) { (x) }); } diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h index b3dd8f2b2f..5aa07770ef 100644 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h @@ -302,19 +302,20 @@ static inline unsigned long pud_bad(pud_t pud) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -#define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) +#define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +#define pte_pfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ + mfn_to_local_pfn(__pte_mfn(_pte)) : __pte_mfn(_pte)) #define pte_page(x) pfn_to_page(pte_pfn(x)) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - - (pte).pte = (pfn_to_mfn(page_nr) << PAGE_SHIFT); - (pte).pte |= pgprot_val(pgprot); - (pte).pte &= __supported_pte_mask; - return pte; + unsigned long pte = page_nr << PAGE_SHIFT; + pte |= pgprot_val(pgprot); + pte &= __supported_pte_mask; + return __pte(pte); } /* @@ -446,18 +447,25 @@ static inline pud_t *pud_offset_k(pgd_t *pgd, unsigned long address) /* physical address -> PTE */ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { - pte_t pte; - (pte).pte = physpage | pgprot_val(pgprot); - return pte; + unsigned long pteval; + pteval = physpage | pgprot_val(pgprot); + return __pte(pteval); } /* Change flags of a PTE */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - (pte).pte &= _PAGE_CHG_MASK; - (pte).pte |= pgprot_val(newprot); - (pte).pte &= __supported_pte_mask; - return pte; + /* + * Since this might change the present bit (which controls whether + * a pte_t object has undergone p2m translation), we must use + * pte_val() on the input pte and __pte() for the return value. + */ + unsigned long pteval = pte_val(pte); + + pteval &= _PAGE_CHG_MASK; + pteval |= pgprot_val(newprot); + pteval &= __supported_pte_mask; + return __pte(pteval); } #define pte_index(address) \ -- 2.30.2